
********************************************************************************
* code: media coverage and the cost of equity capital around the world

/*** We are unable to share the data due to permission restrictions.
///  To enhance clarity, we have simplified the code for easier understanding.
///  To ensure that users can better understand the code, we provide pseudo-data sets.
///  It is important to note that these pseudo-data sets represent only a small sample intended for illustrating regression analysis, in which the company codes(dscd) have been anonymized through masking. ***/

********************************************************************************
cd "/Users/Desktop/Media and ICOC_FM"

********************************************************************************
//1. ICOC calculation
********************************************************************************
/*
S1: download stock price and analyst forecast data from I/B/E/S and inflation data from Datastream
S2: merge together and run Matlab code to calculate ICOC (below are core Matlab codes)

/*
%% function cal (solve the function)
parfor i=1:len

valid_r_ct(i)={double(solve(sum((x(i,:)-r_ct*bv(i,1:5)).*([1/(1+r_ct),1/(1+r_ct)^2,1/(1+r_ct)^3,1/(1+r_ct)^4,1/(1+r_ct)^5]),2)...
    +bv(i,1)+((x(i,5)-r_ct*bv(i,5)).*(1+g(i,:)))./((r_ct-g(i,:))*(1+r_ct)^5)-p(i,:), r_ct,'MaxDegree', 6))'};
end

parfor i=1:len

valid_r_gls(i)={double(solve(sum((x(i,1:3)-r_gls.*bv(i,1:3)).*([1/(1+r_gls),1/(1+r_gls)^2,1/(1+r_gls)^3]),2)...
    +bv(i,1)+((x(i,4)-r_gls*bv(i,4)))./(r_gls*(1+r_gls)^3)-p(i,:), r_gls, 'MaxDegree', 4))'};

end

r_oj=[((glt.*p+d)-sqrt((glt.*p+d).^2+4.*p.*x.*(gst-glt)))./(2*p),((glt.*p+d)+sqrt((glt.*p+d).^2+4.*p.*x.*(gst-glt)))./(2*p)];

r_peg=[(d-sqrt(d.^2-4.*p.*(x(:,1)-x(:,2))))./(2*p),(d+sqrt(d.^2-4.*p.*(x(:,1)-x(:,2))))./(2*p)];
*/

S3: import ICOC data file (in Excel) to Stata, save as "ICOC_ok.dta"
*/
use ICOC_ok, clear
rename _all, lower

keep ticker dscd code market fccode icb1 icb2 icb3 icb4 year r_ct r_gls r_oj r_peg
//"ticker", "dscd", and "code" are I/B/E/S, Datastream, and Worldscope identifies
//"market" and "fccode" denote country and countrycode
//"icb1"-"icb4" are industry codes

gen r_hl = (r_ct + r_gls + r_oj + r_peg)/4

tsset dscd year
foreach v of var r_ct r_gls r_oj r_peg r_hl {
	gen `v'_lag = l.`v'
}

winsor2 r_ct r_gls r_oj r_peg r_hl r_ct_lag r_gls_lag r_oj_lag r_peg_lag r_hl_lag, cuts(1 99) replace

save ICOC_ok_update, replace
********************************************************************************

********************************************************************************
//2. Ravenpack calculation
********************************************************************************
use Ravenpack_100, clear //only contain news articles with a relevance score of 100

*** newscov
gen flag_press = (news_type!="PRESS-RELEASE")
bysort rp_entity_id year: egen newscov = sum(flag_press)

*** newscov_firm (PressRelease in Panel E of Table 5)
gen flag_firm = (news_type=="PRESS-RELEASE")
bysort rp_entity_id year: egen newscov_firm = sum(flag_firm)

keep if flag_press==1
//our study focuses on media-initiated news articles excluding press releases that are originated by firms

*** newscov_reputable and newscov_other
bysort rp_entity_id year: egen newscov_reputable = sum(flag_reputable)
//"flag_reputable" identifies reputable international business news outlets (i.e., The Wall Street Journal, Financial Times, Dow Jones Newswires, and Reuters)
bysort rp_entity_id year: egen newscov_other = sum(flag_other)
//"flag_other" identifies all other business news providers

*** newscov_earnings, newscov_equity, and newscov_else
bysort rp_entity_id year: egen newscov_earnings = sum(flag_earnings)
//"flag_earnings" identifies earnings-related news, including news articles about a firm's earnings or revenues
bysort rp_entity_id year: egen newscov_equity = sum(flag_equity)
//"flag_equity" identifies news articles about dividends, equity actions, or stock prices
bysort rp_entity_id year: egen newscov_else = sum(flag_else)
//"flag_else" identifies all other categories of news

*** newscov_breaking and newscov_repeated
bysort rp_entity_id year: egen newscov_breaking = sum(flag_breaking)
//"flag_breaking" identifies the first article released specifically to a categorized event(ens=100)
bysort rp_entity_id year: egen newscov_repeated = sum(flag_repeated)
//"flag_repeated" identifies repeated news articles

*** newscov_full and newscov_brief
bysort rp_entity_id year: egen newscov_full = sum(flag_full) //"flag_full" identifies full articles
bysort rp_entity_id year: egen newscov_brief = sum(flag_brief) //"flag_brief" identifies brief articles

*** newscov_positive and newscov_negative
bysort rp_entity_id year: egen newscov_positive = sum(flag_positive)
//"flag_positive" identifies positive news articles (based on the ESS provided by Ravenpack)
bysort rp_entity_id year: egen newscov_negative = sum(flag_negative)
//"flag_negative" identifies negative news articles (based on the ESS provided by Ravenpack)

*** sentiment
bysort rp_entity_id year: egen sentiment = mean(ess_rescaled)
//The original ESS scores are rescaled to a range of -1 to 1, so that a positive (negative) value indicates positive (negative) news, and 0 denotes neutral news.

duplicates drop rp_entity_id year, force
tsset rp_entity_id year

foreach v of var newscov newscov_firm newscov_reputable newscov_other newscov_earnings newscov_equity newscov_else newscov_breaking newscov_repeated newscov_full newscov_brief newscov_positive newscov_negative {
	replace `v' = ln(1+`v')
}

foreach v of var newscov newscov_firm newscov_reputable newscov_other newscov_earnings newscov_equity newscov_else newscov_breaking newscov_repeated newscov_full newscov_brief newscov_positive newscov_negative sentiment {
	gen `v'_lag = l.`v'
}

winsor2 newscov newscov_firm newscov_reputable newscov_other newscov_earnings newscov_equity newscov_else newscov_breaking newscov_repeated newscov_full newscov_brief newscov_positive newscov_negative sentiment newscov_lag newscov_firm_lag newscov_reputable_lag newscov_other_lag newscov_earnings_lag newscov_equity_lag newscov_else_lag newscov_breaking_lag newscov_repeated_lag newscov_full_lag newscov_brief_lag newscov_positive_lag newscov_negative_lag sentiment_lag, cuts(1 99)  replace

save newscov_ok, replace
********************************************************************************

********************************************************************************
//3. Controls in baseline regressions
********************************************************************************
*** rvar
bysort dscd year: egen nonmiss_count = sum(!missing(ri_monthly))
//"ri_monthly" denotes monthly stock returns derived and calculated from Datastream
bysort dscd year: egen rvar = sd(ri_monthly) if nonmiss_count==12

duplicates drop dscd year, force //convert data to firm-year level

*** btm
gen btm = item3501 / item8001
//"item3501"(book value of equity) and "item8001"(market value of equity) are from Worldscope

*** lev
gen lev = item3251 / item2999
//"item3251"(long-term debt) and "item2999"(total assets) are from Worldscope

*** size
gen size = ln(item7230/1000000)
//"item7230"(total assets in U.S.$) is from Worldscope

*** optimism
gen optimism = (epshat_mean - eps_actual) / aps_lag
//"epshat_mean" is the one-year-ahead analyst consensus annual earnings per share forecast, calculated from I/B/E/S
//"eps_actual" is the actual earnings per share, derived from I/B/E/S
//"aps_lag" is the lagged assets per share, calculated from I/B/E/S

*** disp
gen disp = epshat_sd / epshat_mean
//"epshat_sd" is the standard deviation of one-year-ahead analyst earnings per share forecasts, calculated from I/B/E/S

*** infl
gen infl = inflation //"inflation" denotes country-level inflation rate

*** lngdpc
gen lngdpc = ln(gdpc) //"gdpc" denotes a country's real GDP per capita(in U.S.$)

foreach v of var rvar btm lev size optimism disp lngdpc {
	gen `v'_lag = l.`v'
}

gen infl_ahead = f.infl

winsor2 rvar btm lev size optimism disp infl lngdpc rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag, cuts(1 99) replace

save controls_baseline, replace
********************************************************************************

********************************************************************************
//4. other variables
********************************************************************************
*** 4.1. press freedom variables(derived from Freedom House)
//"legal" is the variable that examines the laws and regulations that influence news content and the extent to which these laws and regulations are enforced in practice.
//"political" is the variable that reflects the degree of political pressure and control on news content.
//"economic" is the variable that captures the degree of economic influence over news content.
//"overall" is the sum of the three component indexes.
//these scores are scaled to be between 0 and 1 and multiplied by -1, so that higher scores of press freedom indexes reflect greater press freedom in the reporting country.

foreach v of var legal political economic overall {
	gen `v'_lag = l.`v'
}

winsor2 legal political economic overall legal_lag political_lag economic_lag overall_lag, cuts(1 99) replace

*** 4.2. other disclosure mechanism variables
gen analyst = ln(1+analyst_follow)
//"analyst_follow" counts the number of analysts following a firm in a year(from I/B/E/S)

gen io = shares_io / shares_outstanding
//"shares_io" denotes shares held by institutional investors for a firm-year(from FactSet)
//"shares_outstanding" denotes total shares outstanding for a firm-year

gen manager_forecast = ln(1+earnings_forecast)
//"earnings_forecast" counts the number of management earnings forecasts in a year(from Capital IQ & I/B/E/S Guidance)

gen manager_optimism = (eps_manager_forecast - eps_actual)/aps_lag
//"eps_manager_forecast" is the management forecast of annual earnings per share
//"eps_actual" is the actual earnings per share
//"aps_lag" is the lagged assets per share

gen ifrs = 1 if item7536 indicates that the firm adopts IFRS in a year
replace ifrs = 0 if ifrs==.
//"item7536"(accounting standard followed) is from Worldscope

gen twitter = 1 if a firm has a Twitter account //hand-collect from https://twitter.com
replace twitter=0 if twitter==.

foreach v of var analyst io manager_forecast manager_optimism ifrs {
	gen `v'_lag = l.`v'
}

winsor2 analyst io manager_forecast manager_optimism analyst_lag io_lag manager_forecast_lag manager_optimism_lag, cuts(1 99) replace

*** 4.3 other variables
gen absDA = abs(discretionary accruals)
//"discretionary accruals" is estimated based on the modified Jones model(Dechow et al., 1995)
//Dechow, P. M., Sloan, R. G., & Sweeney, A. P. 1995. Detecting earnings management. The Accounting Review, 70(2): 193-225.

gen big4=1 if a firm is audited by a Big 4 auditor //Eikon
replace big4=0 if big4==.

gen inddir = ind_director / all_director
//"ind_director" counts the number of independent directors on the board(BoardEx)
//"all_director" counts the number of directors on the board

gen investment = (item1201 + item4601 - item8431) / item2999_lag
//"item1201" denotes R&D expenditure(Worldscope)
//"item4601" denotes capital expenditures(includes acquisitions)
//"item8431" denotes net sales to gross fixed assets
//"item2999_lag" denotes lagged total assets

gen debt = item3255 / item8001
//"item3255" denotes total debt
//"item8001" denotes market value of equity

gen implied_vol = Implied_Volatility //derived from OptionMetrics

foreach v of var absDA big4 inddir {
	gen `v'_lag = l.`v'
}

winsor2 absDA inddir investment debt implied_vol absDA_lag inddir_lag, cuts(1 99) replace

save controls_other, replace 
********************************************************************************

********************************************************************************
//5. merge dataset and run regressions
********************************************************************************
/*
S1: merge the following dabasets: ICOC_ok_update, newscov_ok, controls_baseline, and controls_other
S2: drop financial and utility firms (the four-digit ICB codes from 7535 to 8995)
S3: winsorize continuous variables
S4: save and run regressions
*/

/// Below, we will employ the pseudo-data sets to demonstrate the regressions, in which the company codes(dscd) have been anonymized through masking.

****** Table 1 ******
use pseudo_dataset, clear
logout, save(sum1) excel replace: tab market //Column 1

duplicates drop market dscd, force
logout, save(sum2) excel replace: tab market //Column 2

use pseudo_dataset, clear
logout, dec(4) save(sum3) excel replace: tabstat newscov_lag r_gls r_hl, stats(mean) by(market) //Columns 3-5

****** Table 2 ******
*** Panel A
use pseudo_dataset, clear

logout, dec(4) save(sum4) excel replace: tabstat r_gls r_ct r_peg r_oj r_hl newscov_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag overall_lag legal_lag political_lag economic_lag sentiment_lag analyst_lag io_lag manager_forecast_lag manager_optimism_lag ifrs_lag twitter, stats(n mean sd p5 p25 median p75 p95) column(statistics)

*** Panel B
bysort market year: astile high_low = newscov_lag, nq(2)

ttest r_gls, by(high_low) unequal
ttest r_ct, by(high_low) unequal
ttest r_peg, by(high_low) unequal
ttest r_oj, by(high_low) unequal
ttest r_hl, by(high_low) unequal

*** Panel C
logout, dec(2) save(sum5) excel replace: pwcorr r_gls r_ct r_peg r_ct r_hl newscov_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag overall_lag legal_lag political_lag economic_lag

****** Table 3 ******
use pseudo_dataset, clear

global icoc "r_gls r_ct r_peg r_oj r_hl"

global tabb1 "outreg2 using table3, excel replace bdec(4) tdec(2) e(r2 r2_a) tstat: "
foreach v of var $icoc {
	$tabb1 xi: reghdfe `v' newscov_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag, absorb(icb4 market year) cluster(dscd)
	global tabb1 "outreg2 using table3, excel append bdec(4) tdec(2) e(r2 r2_a) tstat: "
}

*** Oster's (2019) tests, taking r_hl for example
use pseudo_dataset, clear

tab icb4, gen(indu)
tab market, gen(cty)
tab year, gen(yr)

xi: reg r_hl newscov_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag indu* cty* yr*, cluster(dscd)

global r = e(r2) * 1.3 // let it be 1.3 times

psacalc beta newscov_lag, delta(1) rmax($r)
psacalc delta newscov_lag, beta(0) rmax($r)

****** Table 4 ******
*** Panel A
use pseudo_dataset, clear

global tabb1 "outreg2 using table4_a, excel replace bdec(4) tdec(2) e(r2 r2_a) tstat: "
foreach v of var $icoc {
	$tabb1 xi: reghdfe `v' newscov_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag, absorb(dscd year) cluster(dscd)
	global tabb1 "outreg2 using table4_a, excel append bdec(4) tdec(2) e(r2 r2_a) tstat: "
}

*** Panel B
global icoc_lag "r_gls_lag r_ct_lag r_peg_lag r_oj_lag r_hl_lag"

global tabb1 "outreg2 using table4_b, excel replace bdec(4) tdec(2) e(r2 r2_a) tstat: "
foreach v of var $icoc_lag {
	$tabb1 xi: reghdfe newscov `v' rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag, absorb(icb4 market year) cluster(dscd)
	global tabb1 "outreg2 using table4_b, excel append bdec(4) tdec(2) e(r2 r2_a) tstat: "
}

*** Panel C
global tabb1 "outreg2 using table4_c, excel replace bdec(4) tdec(2) e(r2 r2_a) tstat: "
foreach v of var $icoc {
	$tabb1 xi: ivreghdfe `v' (newscov_lag = distance) rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag, absorb(icb4 market year) cluster(dscd)
	global tabb1 "outreg2 using table4_c, excel append bdec(4) tdec(2) e(r2 r2_a) tstat: "
}
//"distance" is the instrumental variable, which is manually collected and calculated with reference to Dai et al.(2015)

****** Table 5 ******
*** Panel A
use pseudo_dataset, clear

global tabb1 "outreg2 using table5_a, excel replace bdec(4) tdec(2) e(r2 r2_a) tstat: "
foreach v of var r_gls r_hl {
	$tabb1 xi: reghdfe `v' newscov_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag if market!="United States", absorb(icb4 market year) cluster(dscd)
	global tabb1 "outreg2 using table5_a, excel append bdec(4) tdec(2) e(r2 r2_a) tstat: "
}

foreach v of var r_gls r_hl {
	$tabb1 xi: reghdfe `v' newscov_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag if countries_eff_media==0, absorb(icb4 market year) cluster(dscd)
}

*** Panel B
global tabb1 "outreg2 using table5_b, excel replace bdec(4) tdec(2) e(r2 r2_a) tstat: "
foreach v of var r_gls r_hl {
	$tabb1 xi: reghdfe `v' newscov_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag if developed==1, absorb(icb4 market year) cluster(dscd)
	global tabb1 "outreg2 using table5_b, excel append bdec(4) tdec(2) e(r2 r2_a) tstat: "
}
//"developed" identifies developed countries as classified in the World Factbook

foreach v of var r_gls r_hl {
	$tabb1 xi: reghdfe `v' newscov_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag if developed==0, absorb(icb4 market year) cluster(dscd)
}

*** Panel C
global tabb1 "outreg2 using table5_c, excel replace bdec(4) tdec(2) e(r2 r2_a) tstat: "
foreach v of var r_gls r_hl {
	$tabb1 xi: reghdfe `v' newscov_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag if english_speaking==1, absorb(icb4 market year) cluster(dscd)
	global tabb1 "outreg2 using table5_c, excel append bdec(4) tdec(2) e(r2 r2_a) tstat: "
}
//"english_speaking" refers to countries where English is recognized as the official language

foreach v of var r_gls r_hl {
	$tabb1 xi: reghdfe `v' newscov_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag if english_speaking==0, absorb(icb4 market year) cluster(dscd)
}

*** Panel D
foreach v of var r_gls r_hl newscov_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag {
	bysort market year: egen `v'_c = mean(`v')
}

duplicates drop market year, force

winsor2 r_gls_c r_hl_c newscov_lag_c rvar_lag_c btm_lag_c lev_lag_c size_lag_c optimism_lag_c disp_lag_c infl_ahead_c lngdpc_lag_c, cuts(1 99) replace

global tabb1 "outreg2 using table5_d, excel replace bdec(4) tdec(2) e(r2 r2_a) tstat: "
foreach v of var r_gls_c r_hl_c {
	$tabb1 xi: reghdfe `v' newscov_lag_c rvar_lag_c btm_lag_c lev_lag_c size_lag_c optimism_lag_c disp_lag_c infl_ahead_c lngdpc_lag_c, absorb(market year) cluster(market)
	global tabb1 "outreg2 using table5_d, excel append bdec(4) tdec(2) e(r2 r2_a) tstat: "
}

*** Panel E
use pseudo_dataset, clear

global tabb1 "outreg2 using table5_e, excel replace bdec(4) tdec(2) e(r2 r2_a) tstat: "
foreach v of var r_gls r_hl {
	$tabb1 xi: reghdfe `v' newscov_lag newscov_firm_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag, absorb(icb4 market year) cluster(dscd)
	global tabb1 "outreg2 using table5_e, excel append bdec(4) tdec(2) e(r2 r2_a) tstat: "
}
//"newscov_firm_lag": PressRelease in Panel E of Table 5

*** Panel F
global tabb1 "outreg2 using table5_f, excel replace bdec(4) tdec(2) e(r2 r2_a) tstat: "
foreach v of var r_gls r_hl {
	$tabb1 xi: reghdfe `v' newscov_lag sentiment_lag analyst_lag optimism_lag io_lag manager_forecast_lag manager_optimism_lag ifrs_lag twitter rvar_lag btm_lag lev_lag size_lag disp_lag infl_ahead lngdpc_lag, absorb(icb4 market year) cluster(dscd)
	global tabb1 "outreg2 using table5_f, excel append bdec(4) tdec(2) e(r2 r2_a) tstat: "
}

****** Table 6 ******
*** Panel A
use pseudo_dataset, clear

global freedom "overall_lag legal_lag political_lag economic_lag"

tab icb4, gen(indu)
tab market, gen(cty)
tab year, gen(yr)

foreach v of var newscov_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag indu* cty* yr* {
	gen overall_`v' = overall_lag * `v'
	gen legal_`v' = legal_lag * `v'
	gen political_`v' = political_lag * `v'
	gen economic_`v' = economic_lag * `v'
}

/***
Given the limited sample size in the pseudo_dataset, certain variables may be omitted due to multicollinearity. However, this issue is not observed in larger samples.
***/
global tabb1 "outreg2 using table6_a, excel replace bdec(4) tdec(2) e(r2 r2_a) tstat: "
$tabb1 xi: reg r_gls newscov_lag overall_lag overall_newscov_lag rvar_lag overall_rvar_lag btm_lag overall_btm_lag lev_lag overall_lev_lag size_lag overall_size_lag optimism_lag overall_optimism_lag disp_lag overall_disp_lag infl_ahead overall_infl_ahead lngdpc_lag overall_lngdpc_lag indu* overall_indu* cty* overall_cty* yr* overall_yr*, cluster(dscd) //Column 1
global tabb1 "outreg2 using table6_a, excel append bdec(4) tdec(2) e(r2 r2_a) tstat: "

$tabb1 xi: reg r_gls newscov_lag legal_lag legal_newscov_lag rvar_lag legal_rvar_lag btm_lag legal_btm_lag lev_lag legal_lev_lag size_lag legal_size_lag optimism_lag legal_optimism_lag disp_lag legal_disp_lag infl_ahead legal_infl_ahead lngdpc_lag legal_lngdpc_lag indu* legal_indu* cty* legal_cty* yr* legal_yr*, cluster(dscd) //Column 2

$tabb1 xi: reg r_gls newscov_lag political_lag political_newscov_lag rvar_lag political_rvar_lag btm_lag political_btm_lag lev_lag political_lev_lag size_lag political_size_lag optimism_lag political_optimism_lag disp_lag political_disp_lag infl_ahead political_infl_ahead lngdpc_lag political_lngdpc_lag indu* political_indu* cty* political_cty* yr* political_yr*, cluster(dscd) //Column 3

$tabb1 xi: reg r_gls newscov_lag economic_lag economic_newscov_lag rvar_lag economic_rvar_lag btm_lag economic_btm_lag lev_lag economic_lev_lag size_lag economic_size_lag optimism_lag economic_optimism_lag disp_lag economic_disp_lag infl_ahead economic_infl_ahead lngdpc_lag economic_lngdpc_lag indu* economic_indu* cty* economic_cty* yr* economic_yr*, cluster(dscd) //Column 4

*** Panel B replicates the codes of Panel A, substituting the dependent variable r_gls with r_hl --> table6_b

****** Table 7 ******
*** Panel A
gen overall_reputable_lag = overall_lag * newscov_reputable_lag
gen overall_other_lag = overall_lag * newscov_other_lag

global tabb1 "outreg2 using table7_a, excel replace bdec(4) tdec(2) e(r2 r2_a) tstat: "
$tabb1 xi: reghdfe r_gls newscov_reputable_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag, absorb(icb4 market year) cluster(dscd) //Column 1
global tabb1 "outreg2 using table7_a, excel append bdec(4) tdec(2) e(r2 r2_a) tstat: "
$tabb1 xi: reghdfe r_gls newscov_other_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag, absorb(icb4 market year) cluster(dscd) //Column 2
$tabb1 xi: reghdfe r_gls newscov_reputable_lag newscov_other_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag, absorb(icb4 market year) cluster(dscd) //Column 3

$tabb1 xi: reg r_gls newscov_reputable_lag overall_lag overall_reputable_lag rvar_lag overall_rvar_lag btm_lag overall_btm_lag lev_lag overall_lev_lag size_lag overall_size_lag optimism_lag overall_optimism_lag disp_lag overall_disp_lag infl_ahead overall_infl_ahead lngdpc_lag overall_lngdpc_lag indu* overall_indu* cty* overall_cty* yr* overall_yr*, cluster(dscd) //Column 4
$tabb1 xi: reg r_gls newscov_other_lag overall_lag overall_other_lag rvar_lag overall_rvar_lag btm_lag overall_btm_lag lev_lag overall_lev_lag size_lag overall_size_lag optimism_lag overall_optimism_lag disp_lag overall_disp_lag infl_ahead overall_infl_ahead lngdpc_lag overall_lngdpc_lag indu* overall_indu* cty* overall_cty* yr* overall_yr*, cluster(dscd) //Column 5
$tabb1 xi: reg r_gls newscov_reputable_lag newscov_other_lag overall_lag overall_reputable_lag overall_other_lag rvar_lag overall_rvar_lag btm_lag overall_btm_lag lev_lag overall_lev_lag size_lag overall_size_lag optimism_lag overall_optimism_lag disp_lag overall_disp_lag infl_ahead overall_infl_ahead lngdpc_lag overall_lngdpc_lag indu* overall_indu* cty* overall_cty* yr* overall_yr*, cluster(dscd) //Column 6

*** Panel B replicates the codes of Panel A, substituting the dependent variable r_gls with r_hl --> table7_b

****** Table 8 ******
*** Panel A
use pseudo_dataset, clear

global tabb1 "outreg2 using table8_a, excel replace bdec(4) tdec(2) e(r2 r2_a) tstat: "
$tabb1 xi: reghdfe r_gls newscov_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag if developed==0 & US_penetration==1, absorb(icb4 market year) cluster(dscd) //Column 1
//"US_penetration" is a dummy variable coded as 1 for high U.S. media penetration countries and 0 otherwise

global tabb1 "outreg2 using table8_a, excel append bdec(4) tdec(2) e(r2 r2_a) tstat: "
$tabb1 xi: reghdfe r_gls newscov_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag if developed==0 & US_penetration==0, absorb(icb4 market year) cluster(dscd) //Column 2
$tabb1 xi: reghdfe r_gls newscov_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag if developed==1 & US_penetration==1, absorb(icb4 market year) cluster(dscd) //Column 3
$tabb1 xi: reghdfe r_gls newscov_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag if developed==1 & US_penetration==0, absorb(icb4 market year) cluster(dscd) //Column 4

//F-test between Columns 1 and 2
xi: qui reg r_gls newscov_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag i.icb4 i.market i.year if developed==0 & US_penetration==1
est store m1

xi: qui reg r_gls newscov_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag i.icb4 i.market i.year if developed==0 & US_penetration==0
est store m2

suest m1 m2, cluster(dscd)

test [m1_mean]newscov_lag = [m2_mean]newscov_lag

//F-test between Columns 3 and 4
xi: qui reg r_gls newscov_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag i.icb4 i.market i.year if developed==1 & US_penetration==1
est store m3

xi: qui reg r_gls newscov_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag i.icb4 i.market i.year if developed==1 & US_penetration==0
est store m4

suest m3 m4, cluster(dscd)

test [m3_mean]newscov_lag = [m4_mean]newscov_lag

*** Panel B replicates the codes of Panel A, substituting the dependent variable r_gls with r_hl --> table8_b

****** Table 9 ******
*** Panel A
use pseudo_dataset, clear

tab icb4, gen(indu)
tab market, gen(cty)
tab year, gen(yr)

foreach v of var newscov_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag indu* cty* yr* {
	gen absDA_`v' = absDA_lag * `v'
	gen analyst_`v' = analyst_lag * `v'
	gen big4_`v' = big4_lag * `v'
}

/***
Given the limited sample size in the pseudo_dataset, certain variables may be omitted due to multicollinearity. However, this issue is not observed in larger samples.
***/
global tabb1 "outreg2 using table9_a, excel replace bdec(4) tdec(2) e(r2 r2_a) tstat: "
$tabb1 xi: reg r_gls newscov_lag absDA_lag absDA_newscov_lag rvar_lag absDA_rvar_lag btm_lag absDA_btm_lag lev_lag absDA_lev_lag size_lag absDA_size_lag optimism_lag absDA_optimism_lag disp_lag absDA_disp_lag infl_ahead absDA_infl_ahead lngdpc_lag absDA_lngdpc_lag indu* absDA_indu* cty* absDA_cty* yr* absDA_yr*, cluster(dscd) //Column 1

global tabb1 "outreg2 using table9_a, excel append bdec(4) tdec(2) e(r2 r2_a) tstat: "
$tabb1 xi: reg r_hl newscov_lag absDA_lag absDA_newscov_lag rvar_lag absDA_rvar_lag btm_lag absDA_btm_lag lev_lag absDA_lev_lag size_lag absDA_size_lag optimism_lag absDA_optimism_lag disp_lag absDA_disp_lag infl_ahead absDA_infl_ahead lngdpc_lag absDA_lngdpc_lag indu* absDA_indu* cty* absDA_cty* yr* absDA_yr*, cluster(dscd) //Column 2

$tabb1 xi: reg r_gls newscov_lag analyst_lag analyst_newscov_lag rvar_lag analyst_rvar_lag btm_lag analyst_btm_lag lev_lag analyst_lev_lag size_lag analyst_size_lag optimism_lag analyst_optimism_lag disp_lag analyst_disp_lag infl_ahead analyst_infl_ahead lngdpc_lag analyst_lngdpc_lag indu* analyst_indu* cty* analyst_cty* yr* analyst_yr*, cluster(dscd) //Column 3
$tabb1 xi: reg r_hl newscov_lag analyst_lag analyst_newscov_lag rvar_lag analyst_rvar_lag btm_lag analyst_btm_lag lev_lag analyst_lev_lag size_lag analyst_size_lag optimism_lag analyst_optimism_lag disp_lag analyst_disp_lag infl_ahead analyst_infl_ahead lngdpc_lag analyst_lngdpc_lag indu* analyst_indu* cty* analyst_cty* yr* analyst_yr*, cluster(dscd) //Column 4

$tabb1 xi: reg r_gls newscov_lag big4_lag big4_newscov_lag rvar_lag big4_rvar_lag btm_lag big4_btm_lag lev_lag big4_lev_lag size_lag big4_size_lag optimism_lag big4_optimism_lag disp_lag big4_disp_lag infl_ahead big4_infl_ahead lngdpc_lag big4_lngdpc_lag indu* big4_indu* cty* big4_cty* yr* big4_yr*, cluster(dscd) //Column 5
$tabb1 xi: reg r_hl newscov_lag big4_lag big4_newscov_lag rvar_lag big4_rvar_lag btm_lag big4_btm_lag lev_lag big4_lev_lag size_lag big4_size_lag optimism_lag big4_optimism_lag disp_lag big4_disp_lag infl_ahead big4_infl_ahead lngdpc_lag big4_lngdpc_lag indu* big4_indu* cty* big4_cty* yr* big4_yr*, cluster(dscd) //Column 6

*** Panel B
foreach v of var newscov_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag indu* cty* yr* {
	gen inddir_`v' = inddir_lag * `v'
	gen io_`v' = io_lag * `v'
}

global tabb1 "outreg2 using table9_b, excel replace bdec(4) tdec(2) e(r2 r2_a) tstat: "
$tabb1 xi: reg r_gls newscov_lag inddir_lag inddir_newscov_lag rvar_lag inddir_rvar_lag btm_lag inddir_btm_lag lev_lag inddir_lev_lag size_lag inddir_size_lag optimism_lag inddir_optimism_lag disp_lag inddir_disp_lag infl_ahead inddir_infl_ahead lngdpc_lag inddir_lngdpc_lag indu* inddir_indu* cty* inddir_cty* yr* inddir_yr*, cluster(dscd) //Column 1

global tabb1 "outreg2 using table9_b, excel append bdec(4) tdec(2) e(r2 r2_a) tstat: "
$tabb1 xi: reg r_hl newscov_lag inddir_lag inddir_newscov_lag rvar_lag inddir_rvar_lag btm_lag inddir_btm_lag lev_lag inddir_lev_lag size_lag inddir_size_lag optimism_lag inddir_optimism_lag disp_lag inddir_disp_lag infl_ahead inddir_infl_ahead lngdpc_lag inddir_lngdpc_lag indu* inddir_indu* cty* inddir_cty* yr* inddir_yr*, cluster(dscd) //Column 2

$tabb1 xi: reg r_gls newscov_lag io_lag io_newscov_lag rvar_lag io_rvar_lag btm_lag io_btm_lag lev_lag io_lev_lag size_lag io_size_lag optimism_lag io_optimism_lag disp_lag io_disp_lag infl_ahead io_infl_ahead lngdpc_lag io_lngdpc_lag indu* io_indu* cty* io_cty* yr* io_yr*, cluster(dscd) //Column 3
$tabb1 xi: reg r_hl newscov_lag io_lag io_newscov_lag rvar_lag io_rvar_lag btm_lag io_btm_lag lev_lag io_lev_lag size_lag io_size_lag optimism_lag io_optimism_lag disp_lag io_disp_lag infl_ahead io_infl_ahead lngdpc_lag io_lngdpc_lag indu* io_indu* cty* io_cty* yr* io_yr*, cluster(dscd) //Column 4

****** Table 10 ******
*** Panel A
use pseudo_dataset, clear

global tabb1 "outreg2 using table10_a, excel replace bdec(4) tdec(2) e(r2 r2_a) tstat: "
foreach v of var r_gls r_hl {
	$tabb1 xi: reghdfe `v' newscov_earnings_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag, absorb(icb4 market year) cluster(dscd) //Column 1
	global tabb1 "outreg2 using table10_a, excel append bdec(4) tdec(2) e(r2 r2_a) tstat: "
	$tabb1 xi: reghdfe `v' newscov_equity_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag, absorb(icb4 market year) cluster(dscd) //Column 2
	$tabb1 xi: reghdfe `v' newscov_else_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag, absorb(icb4 market year) cluster(dscd) //Column 3
	$tabb1 xi: reghdfe `v' newscov_earnings_lag newscov_equity_lag newscov_else_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag, absorb(icb4 market year) cluster(dscd) //Column 4
}

*** Panel B
use pseudo_dataset, clear

global tabb1 "outreg2 using table10_b, excel replace bdec(4) tdec(2) e(r2 r2_a) tstat: "
foreach v of var r_gls r_hl {
	$tabb1 xi: reghdfe `v' newscov_breaking_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag, absorb(icb4 market year) cluster(dscd) //Column 1
	global tabb1 "outreg2 using table10_b, excel append bdec(4) tdec(2) e(r2 r2_a) tstat: "
	$tabb1 xi: reghdfe `v' newscov_repeated_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag, absorb(icb4 market year) cluster(dscd) //Column 2
	$tabb1 xi: reghdfe `v' newscov_breaking_lag newscov_repeated_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag, absorb(icb4 market year) cluster(dscd) //Column 3
}

*** Panel C
use pseudo_dataset, clear

global tabb1 "outreg2 using table10_c, excel replace bdec(4) tdec(2) e(r2 r2_a) tstat: "
foreach v of var r_gls r_hl {
	$tabb1 xi: reghdfe `v' newscov_full_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag, absorb(icb4 market year) cluster(dscd) //Column 1
	global tabb1 "outreg2 using table10_c, excel append bdec(4) tdec(2) e(r2 r2_a) tstat: "
	$tabb1 xi: reghdfe `v' newscov_brief_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag, absorb(icb4 market year) cluster(dscd) //Column 2
	$tabb1 xi: reghdfe `v' newscov_full_lag newscov_brief_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag, absorb(icb4 market year) cluster(dscd) //Column 3
}

*** Panel D
use pseudo_dataset, clear

global tabb1 "outreg2 using table10_d, excel replace bdec(4) tdec(2) e(r2 r2_a) tstat: "
foreach v of var r_gls r_hl {
	$tabb1 xi: reghdfe `v' newscov_positive_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag, absorb(icb4 market year) cluster(dscd) //Column 1
	global tabb1 "outreg2 using table10_d, excel append bdec(4) tdec(2) e(r2 r2_a) tstat: "
	$tabb1 xi: reghdfe `v' newscov_negative_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag, absorb(icb4 market year) cluster(dscd) //Column 2
	$tabb1 xi: reghdfe `v' newscov_positive_lag newscov_negative_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag, absorb(icb4 market year) cluster(dscd) //Column 3
}

****** Table 11 ******
*** Panel A
use pseudo_dataset, clear

tab icb4, gen(indu)
tab market, gen(cty)
tab year, gen(yr)

foreach v of var newscov_positive_lag newscov_negative_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag indu* cty* yr* {
	gen absDA_`v' = absDA_lag * `v'
	gen analyst_`v' = analyst_lag * `v'
	gen big4_`v' = big4_lag * `v'
	gen inddir_`v' = inddir_lag * `v'
	gen io_`v' = io_lag * `v'
}

global tabb1 "outreg2 using table11_a, excel replace bdec(4) tdec(2) e(r2 r2_a) tstat: "
$tabb1 xi: reghdfe investment newscov_positive_lag newscov_negative_lag rvar_lag btm_lag lev_lag size_lag optimism_lag disp_lag infl_ahead lngdpc_lag, absorb(icb4 market year) cluster(dscd) //Column 1
global tabb1 "outreg2 using table11_a, excel append bdec(4) tdec(2) e(r2 r2_a) tstat: "

$tabb1 xi: reg investment newscov_positive_lag newscov_negative_lag absDA_lag absDA_newscov_positive_lag absDA_newscov_negative_lag rvar_lag absDA_rvar_lag btm_lag absDA_btm_lag lev_lag absDA_lev_lag size_lag absDA_size_lag optimism_lag absDA_optimism_lag disp_lag absDA_disp_lag infl_ahead absDA_infl_ahead lngdpc_lag absDA_lngdpc_lag indu* absDA_indu* cty* absDA_cty* yr* absDA_yr*, cluster(dscd) //Column 2

$tabb1 xi: reg investment newscov_positive_lag newscov_negative_lag analyst_lag analyst_newscov_positive_lag analyst_newscov_negative_lag rvar_lag analyst_rvar_lag btm_lag analyst_btm_lag lev_lag analyst_lev_lag size_lag analyst_size_lag optimism_lag analyst_optimism_lag disp_lag analyst_disp_lag infl_ahead analyst_infl_ahead lngdpc_lag analyst_lngdpc_lag indu* analyst_indu* cty* analyst_cty* yr* analyst_yr*, cluster(dscd) //Column 3

$tabb1 xi: reg investment newscov_positive_lag newscov_negative_lag big4_lag big4_newscov_positive_lag big4_newscov_negative_lag rvar_lag big4_rvar_lag btm_lag big4_btm_lag lev_lag big4_lev_lag size_lag big4_size_lag optimism_lag big4_optimism_lag disp_lag big4_disp_lag infl_ahead big4_infl_ahead lngdpc_lag big4_lngdpc_lag indu* big4_indu* cty* big4_cty* yr* big4_yr*, cluster(dscd) //Column 4

$tabb1 xi: reg investment newscov_positive_lag newscov_negative_lag inddir_lag inddir_newscov_positive_lag inddir_newscov_negative_lag rvar_lag inddir_rvar_lag btm_lag inddir_btm_lag lev_lag inddir_lev_lag size_lag inddir_size_lag optimism_lag inddir_optimism_lag disp_lag inddir_disp_lag infl_ahead inddir_infl_ahead lngdpc_lag inddir_lngdpc_lag indu* inddir_indu* cty* inddir_cty* yr* inddir_yr*, cluster(dscd) //Column 5

$tabb1 xi: reg investment newscov_positive_lag newscov_negative_lag io_lag io_newscov_positive_lag io_newscov_negative_lag rvar_lag io_rvar_lag btm_lag io_btm_lag lev_lag io_lev_lag size_lag io_size_lag optimism_lag io_optimism_lag disp_lag io_disp_lag infl_ahead io_infl_ahead lngdpc_lag io_lngdpc_lag indu* io_indu* cty* io_cty* yr* io_yr*, cluster(dscd) //Column 6

*** Panel B replicates the codes of Panel A, substituting the dependent variable investment with debt --> table11_b

****** Table 12 ******
/*
S1: Sample Construction

We begin with the original editorial articles published by the business press. To ensure the independence of news events, we apply the following filters:
(1) Exclude articles published during firms' earnings announcement windows;
(2) Require a minimum interval of ten days between any two consecutive news articles about the same firm.

S2: Variable Definitions

(1) "news_release": A dummy variable that equals 1 for the news release day and the subsequent three days (i.e., t to t+3) and 0 for the three days preceding the news release (i.e., t-1 to t-3).
(2) "news_sentiment": The rescaled Event Sentiment Score (ESS) of the news article, assigned to the release day and the following three days (t to t+3), and set to 0 for the three days prior to the news release (t-1 to t-3).

S3: Empirical Analysis
We merge the news data with implied volatility data and conduct regression analyses
*/
use pseudo_dataset2, clear

/***
Given the limited sample size in the pseudo_dataset, certain variables may be omitted due to multicollinearity. However, this issue is not observed in larger samples.
***/
global tabb1 "outreg2 using table12, excel replace bdec(4) tdec(2) e(r2 r2_a) tstat: "
$tabb1 xi: reghdfe implied_vol news_release news_sentiment analyst_lag optimism_lag io_lag manager_forecast_lag manager_optimism_lag ifrs_lag twitter rvar_lag btm_lag lev_lag size_lag disp_lag infl_ahead lngdpc_lag, absorb(icb4 market year) cluster(dscd) //Column 1

global tabb1 "outreg2 using table12, excel append bdec(4) tdec(2) e(r2 r2_a) tstat: "
$tabb1 xi: reghdfe implied_vol news_release news_sentiment analyst_lag optimism_lag io_lag manager_forecast_lag manager_optimism_lag ifrs_lag twitter rvar_lag btm_lag lev_lag size_lag disp_lag infl_ahead lngdpc_lag, absorb(dscd year) cluster(dscd) //Column 2


****** Table IA.1 ******
*** We follow the same methodology as in Table 3

****** Table IA.3 ******
*** We follow the same methodology as in Panel F of Table 5

****** Table IA.4 ******
use pseudo_dataset, clear

logout, dec(4) save(IA4) excel replace: tabstat io_lag manager_forecast_lag twitter, stats(n mean sd p5 p25 median p75 p95) column(statistics)

****** Table IA.5 ******
use pseudo_dataset, clear

drop if market=="China" | market=="Pakistan" | market=="Germany" | market=="Greece" | market=="Indonesia" | market=="Norway" | market=="South Korea" | market=="Turkey" | market=="Hong Kong"

global tabb1 "outreg2 using IA5, excel replace bdec(4) tdec(2) e(r2 r2_a) tstat: "
foreach v of var r_gls r_hl {
	$tabb1 xi: reghdfe `v' newscov_lag sentiment_lag analyst_lag optimism_lag io_lag manager_forecast_lag manager_optimism_lag ifrs_lag twitter rvar_lag btm_lag lev_lag size_lag disp_lag infl_ahead lngdpc_lag, absorb(icb4 market year) cluster(dscd)
	global tabb1 "outreg2 using IA5, excel append bdec(4) tdec(2) e(r2 r2_a) tstat: "
}

****** Table IA.6 ******
//For each country, we perform the baseline regressions in Table 3



********************************************************************************
********************************************************************************
